Prelim set-up

rm(list = ls())
source("matrices.R")
source("agents.R")
library(parallel)
library(stats)
library(knitr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(rrrsa)
knitr::opts_chunk$set(cache=TRUE,
                      warning=FALSE, message=FALSE)

Helpers

Helper functions of data pre-processing and combining (from sims.R).

#####
##### normalizeBy()
##### -------------
##### normaze matrix by rows or cols, maintain naming
#####
normalizeBy <- function(m, rows = TRUE) {
  colNames <- colnames(m)
  rowNames <- rownames(m)
  normedM <- apply(m, MARGIN = ifelse(rows, 1, 2), rrrsa::rsa.normVec)
  colnames(normedM) <- colNames
  rownames(normedM) <- rowNames
  normedM
}

#####
##### inGlobalEnv()
##### -------------
##### boolean object presence in global env
#####
inGlobalEnv <- function(item) {
  item %in% ls(envir = .GlobalEnv)
}

#####
##### getLiteralSemantics()
##### ---------------------
##### literal semantics mapper between 
##### normalized matrices and dataframe
##### called in mutate() within dplyr chain
#####
getLiteralSemantics <- function(matrixName, object, query) {
  if (!(inGlobalEnv("simpleM") | inGlobalEnv("complexM") | inGlobalEnv("oddmanM") | inGlobalEnv("twinsM"))) {
    stop("Missing normalized matrices, please run data pre-processing block")
  }
  
  if (matrixName == "simple") {
    simpleM[as.character(object), as.character(query)]
  } else if (matrixName == "complex") {
    complexM[as.character(object), as.character(query)]
  } else if (matrixName == "oddman") {
    oddmanM[as.character(object), as.character(query)]
  } else if (matrixName == "twins") {
    twinsM[as.character(object), as.character(query)]
  } else {
    stop("Error in getLiteralSemantics()")
  }
}

#####
##### completeQueries()
##### ---------------------
##### add "query" words for each expt
##### this is necessary for running with rrrsa::runDf
#####
completeQueries <- function(d) {
  ## store data
  newD <- d
  currMatrix <- unique(as.character(d$matrix))
  
  ## meta-data
  allQueries <- c("glasses", "hat", "mustache")
  allObjects <- c("foil", "logical", "target")
  
  ## The current query
  currQuery <- unique(as.character(d$query))
  neededQueries <- setdiff(allQueries, currQuery)
  
  ## Add in new queries
  for (q in neededQueries) {
    df <- data.frame(cond = as.vector(d$cond),
                     expt = as.vector(d$expt),
                     matrix = as.vector(d$matrix),
                     prior = as.vector(d$prior),
                     query = rep(q, 3),
                     object = as.vector(d$object),
                     count = rep(NA, 3),
                     p = rep(NA, 3),
                     n = as.vector(d$n),
                     cil = rep(NA, 3), 
                     cih = rep(NA, 3),
                     priorType = as.vector(d$priorType),
                     priorValue = as.vector(d$priorValue),
                     grouper = as.vector(d$grouper))
    newD <- plyr::rbind.fill(newD, df)
    if (currMatrix == "simple") return(newD)
  }
  newD
}
# undebug(completeQueries)
# debug(completeQueries)
# completeQueries(baserate_63)

#####
##### convertListToDf()
##### -----------------
##### Convert a list where each element represents
##### data for a different grouping variable
##### useful with simulations run below
#####
convertListToDF <- function(li, rows = TRUE) {
  df <- data.frame()
  nItems <- seq(1, length(li))
  if (rows) {
    for (i in nItems) {
      df <- rbind(df, as.data.frame(li[[i]]))
    }
  } else {
    df <- data.frame(matrix(nrow = length(li[[1]]), ncol = 1))
    for (i in nItems) {
      df <- cbind(df, as.data.frame(li[[i]]))
    }
    return(df[, -1])
  }
  df
}

#####
##### rrrsa_ready_d
##### -------------
##### process data for use with rrrsa
##### NOTE :: this is not a generic fn
##### an makes certain assumptions about column naming
##### (eg. colnames -> object, expt, n, matrix)
#####
rrrsa_ready_df <- function(d, foil_name = "foil", logical_name = "logical", target_name = "target") {
  ## Initial pre-processing before adding in new cols and then adding literal semantics
  processedD <- d %>%
    ## Gather priors - we're only interested in prior for current object (need to use NSE to make
    ## compatible with boostrapped priors)
    gather_("priorType", "priorValue", c(foil_name, logical_name, target_name))
  processedD <- processedD %>%
    ## Filter out any mismatches between current object and priorType
    ## Do rowwise with grep so that we can use with boostrap naming conventions
    rowwise %>%
    filter(grepl(object, priorType)) %>%
    rowwise %>%
    ## Grouping variable concatenates experiment type and sample size (expt-n)
    mutate(grouper = paste0(as.character(expt), "-", as.character(n)))
  
  fullData <- plyr::ddply(processedD, .variables = ("grouper"), .fun = completeQueries) %>%
    rowwise %>%
    ## Each row gets a unique literal semantics from getLiteraSemantics() helper
    mutate(speaker.p = getLiteralSemantics(matrix, query = query, object = object)) %>%
    arrange(expt, n)
  
  fullData
}

Global data / settings

set.seed(1)
ALPHAS <- seq(0, 5, by = 0.1)
DEPTHS <- seq(0, 5)

Read in data

## This is `d` df from line 128 in sims.Rmd
d <- read.csv("data/models.csv")

Matrices from matrices.R

## Renamed, normalized matrices from matrices.R
simpleM <- normalizeBy(simple, rows = FALSE)
complexM <- normalizeBy(complex, rows = FALSE)
twinsM <- normalizeBy(twins, rows = FALSE)
oddmanM <- normalizeBy(oddman, rows = FALSE)

Data pre-processing

NOTE:: To work with rrrsa::runDf we need each matrix to include the literal semantics of all the queries (even if they weren’t actually queried) Look at group ‘baserate-63’: only ‘glasses’ is queried, but we need to add the literal semantics for ‘hat’ and ‘mustache’ as well.

fullData <- rrrsa_ready_df(d)

Helper if we want to avoid using rsa.tuneDepthAlpha()

## tune_depth_alpha()
## ------------------
## custom tuning if we don't want to use rsa.tuneDepthApha()
tune_depth_alpha <- function(d,
                             depths,
                             alphas,
                             usePrior,
                             usePriorEveryRecurse) {
  res <- data.frame(r=c(), alpha=c(), depth=c())
  for (depth in depths) {
    for (alpha in alphas) {
      ## Prior
      if (usePrior) {
        curr_out <- d %>%
        split(.$grouper) %>%
        purrr::map_df(~rsa.runDf(data=.x,
                                 quantityVarName="object",
                                 semanticsVarName="speaker.p",
                                 itemVarName="query",
                                 priorsVarName="priorValue",
                                 alpha=alpha,
                                 depth=depth,
                                 usePriorEveryRecurse=userPriorEveryRecurse)) %>%
        rowwise %>%
        mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
               depth = depth,
               alpha=alpha) %>%
        filter(tuningData == "keep")
      }
      ## No prior
      else {
        curr_out <- d %>%
        split(.$grouper) %>%
        purrr::map_df(~rsa.runDf(data=.x,
                                 quantityVarName="object",
                                 semanticsVarName="speaker.p",
                                 itemVarName="query",
                                 alpha=alpha,
                                 depth=depth,
                                 usePriorEveryRecurse=userPriorEveryRecurse)) %>%
        rowwise %>%
        mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
               depth = depth,
               alpha=alpha) %>%
        filter(tuningData == "keep")
      }
      res <- rbind(res, data.frame(r=cor(curr_out$p, curr_out$preds), alpha=alpha, depth=depth))
    }
  }
  res
}

Testing custom tune_depth_alpha()

tune1 <- tune_depth_alpha(fullData, DEPTHS, ALPHAS, FALSE)
tune1 %>%
  summarise(max_r=max(r),
            max_alpha=alpha[which.max(r)],
            max_depth=depth[which.max(r)])

Simulations WITH ‘Private Knowledge priors’ (recursive depth 0-5)

“Private knowledge” priors are only multiplied at the bottom level recursion.

Recursive depth sims

# sims_prior <- tune_depth_alpha(fullData, depths=DEPTHS, alphas=1, usePrior=TRUE, usePriorEveryRecurse=FALSE)
## Pre-sort by grouper
fullData <- fullData %>% arrange(grouper)
sims_prior_temp_pk <- lapply(DEPTHS, function(depth) {
  plyr::ddply(fullData, .variables = c("grouper"), rsa.runDf,
            quantityVarName = "object", semanticsVarName = "speaker.p",
            itemVarName = "query", priorsVarName = "priorValue", depth = depth, usePriorEveryRecurse = FALSE) %>%
  rowwise %>%
  mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
         depth = depth) %>%
  filter(tuningData == "keep")
})
sims_prior_pk <- convertListToDF(sims_prior_temp_pk)

## written with purrr
##
# sims_prior_temp2 <- lapply(DEPTHS, function(depth) {
#   # plyr::ddply(fullData, .variables = c("grouper"), rsa.runDf,
#   #           quantityVarName = "object", semanticsVarName = "speaker.p",
#   #           itemVarName = "query", priorsVarName = "priorValue", depth = depth, usePriorEveryRecurse = FALSE) %>%
#   fullData %>%
#     split(.$grouper) %>%
#     purrr::map_df(~rsa.runDf(data=.x,
#                              quantityVarName="object",
#                              semanticsVarName="speaker.p",
#                              itemVarName="query",
#                              priorsVarName="priorValue",
#                              depth=depth,
#                              usePriorEveryRecurse=FALSE)) %>%
#   rowwise %>%
#   mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
#          depth = depth) %>%
#   filter(tuningData == "keep")
# })
# sims_prior2 <- convertListToDF(sims_prior_temp2)

Simulation plots

Recursive depth 0-2

sims_prior_pk %>%
  filter(depth < 3) %>%
  ggplot(., aes(x = preds, y = p, col = expt, pch = object)) +
    geom_pointrange(aes(ymin = cil, ymax = cih)) + 
    xlim(c(0,1)) + ylim(c(0,1)) + 
    facet_wrap(~depth) +
    ylab("Proportion choosing target") + 
    xlab("Model Predictions") + 
    geom_smooth(method="lm", aes(group=1)) + 
    geom_abline(slope = 1, intercept = 0, lty=2) +
    ggtitle("Private knowledge priors model sims, alpha = 1") + theme_bw()

Recursive depth 0-5

ggplot(sims_prior_pk, aes(x = preds, y = p, col = expt, pch = object)) +
  geom_pointrange(aes(ymin = cil, ymax = cih)) + 
  xlim(c(0,1)) + ylim(c(0,1)) + 
  facet_wrap(~depth) +
  ylab("Proportion choosing target") + 
  xlab("Model Predictions") + 
  geom_smooth(method="lm", aes(group=1)) + 
  geom_abline(slope = 1, intercept = 0, lty=2) +
  ggtitle("Private knowledge priors model sims, alpha = 1") + theme_bw()

Correlations

print("Private knowledge priors models, alpha held at 1, varied depth")
## [1] "Private knowledge priors models, alpha held at 1, varied depth"
cors_pk <- sims_prior_pk %>%
  group_by(depth) %>%
  summarise("cor" = cor(p, preds)) %>%
  mutate("alpha" = 1)
cors_pk %>%
  kable()
depth cor alpha
0 0.7357369 1
1 0.8581523 1
2 0.9281867 1
3 0.9472652 1
4 0.9523711 1
5 0.9528916 1

Fitting alpha

## Need to identify indices we want for performance comparisons
fullData <- fullData %>%
  rowwise %>%
  mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"))
keepIndices <- c("keep", "throwout")
compareIndices  <- which(fullData$tuningData == "keep")

## Parallelize fitting
priorFit_pk <- mclapply(DEPTHS, function(i) { ## parallelize over depths
  rsa.tuneDepthAlpha(fullData, quantityVarName = "object",
                     semanticsVarName = "speaker.p", itemVarName = "query",
                     priorsVarName = "priorValue", groupName = "grouper",
                     compareDataName = "p", compareIndices = compareIndices,
                     depth = i, alphas = ALPHAS, usePriorEveryRecurse = FALSE)
  },
  mc.cores = detectCores())
aggregateTuneData_pk <- convertListToDF(priorFit_pk)

Model performance

## Get best model performance by alpha, depth
maxCors_pk <- aggregateTuneData_pk %>%
  group_by(depth) %>%
  summarize(maxCor = max(cor),
            alpha = alpha[which.max(cor)])

Plot performance tuning

aggregateTuneData_plot_pk <- aggregateTuneData_pk %>%
  mutate(Recursive_depth = as.factor(depth))

## Plot tuning (Plot adapted for personal_site)
ggplot(aggregateTuneData_plot_pk, aes(x = alpha, y = cor, col=Recursive_depth)) +
  geom_point(alpha=0.6, size=4) +
  geom_vline(xintercept=maxCors_pk$alpha, col="grey", lty="dotdash")+
  ylab("Correlation") +
  scale_x_continuous(breaks = round(seq(0, 6, by = 0.1)), "Rationality (alpha level)") +
  # theme_solarized() +
  theme_bw() +
  theme(legend.position="bottom", legend.box="horizontal",
        legend.text=element_text(size=8), legend.direction="horizontal") 

Best priors model performance with fitted alphas

depths_param_data_pk <- maxCors_pk %>%
  arrange(depth)
best_prior_models_fitted_alpha_pk <- convertListToDF(lapply(DEPTHS, function(depth) {
  plyr::ddply(fullData, .variables = c("grouper"), rsa.runDf,
              quantityVarName = "object", semanticsVarName = "speaker.p",
              priors = "priorValue", usePriorEveryRecurse = FALSE,
              itemVarName = "query", depth = depths_param_data_pk$depth[depth + 1],
              alpha = depths_param_data_pk$alpha[depth + 1]) %>%
    mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
           depth = depth) %>%
    filter(tuningData == "keep")
  }))
## Quick check
## cor(best_prior_models_fitted_alpha[best_prior_models_fitted_alpha$depth == 1, ]$p, best_prior_models_fitted_alpha[best_prior_models_fitted_alpha$depth == 1, ]$preds)

Plot best model performance with fitted alphas

print("Private knowledge priors model performance for recursive depth 0-5 with fitted alphas")
## [1] "Private knowledge priors model performance for recursive depth 0-5 with fitted alphas"
maxCors_pk %>% kable() ## performance data
depth maxCor alpha
0 0.7357369 0.0
1 0.9453030 3.1
2 0.9522229 1.4
3 0.9527394 1.1
4 0.9523711 1.0
5 0.9528916 1.0
## plots
ggplot(best_prior_models_fitted_alpha_pk, aes(x = preds, y = p, col = expt, pch = object)) +
  geom_pointrange(aes(ymin = cil, ymax = cih)) + 
  xlim(c(0,1)) + ylim(c(0,1)) + 
  facet_wrap(~depth, labeller = label_both) +
  ylab("Proportion choosing target") + 
  xlab("Model Predictions") + 
  geom_smooth(method="lm", aes(group=1)) + 
  geom_abline(slope = 1, intercept = 0, lty=2) +
  ggtitle("Private knowledge priors model performance for recursive depth 0-5 with fitted alphas") + theme_bw()

Simulations WITH ‘Common knowledge priors’ (recursive depth 0-5)

“Common knowledge” priors are included at every level of recursion.

Recursive depth sims

## Pre-sort by grouper
fullData <- fullData %>% arrange(grouper)
sims_prior_temp_ck <- lapply(DEPTHS, function(depth) {
  plyr::ddply(fullData, .variables = c("grouper"), rsa.runDf,
            quantityVarName = "object", semanticsVarName = "speaker.p",
            itemVarName = "query", priorsVarName = "priorValue", depth = depth, usePriorEveryRecurse = TRUE) %>%
  rowwise %>%
  mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
         depth = depth) %>%
  filter(tuningData == "keep")
})
sims_prior_ck <- convertListToDF(sims_prior_temp_ck)

Simulation plots

Recursive depth 0-2

sims_prior_ck %>%
  filter(depth < 3) %>%
  ggplot(., aes(x = preds, y = p, col = expt, pch = object)) +
    geom_pointrange(aes(ymin = cil, ymax = cih)) + 
    xlim(c(0,1)) + ylim(c(0,1)) + 
    facet_wrap(~depth) +
    ylab("Proportion choosing target") + 
    xlab("Model Predictions") + 
    geom_smooth(method="lm", aes(group=1)) + 
    geom_abline(slope = 1, intercept = 0, lty=2) +
    ggtitle("Common knowledge priors model sims, alpha = 1") + theme_bw()

Recursive depth 0-5

ggplot(sims_prior_ck, aes(x = preds, y = p, col = expt, pch = object)) +
  geom_pointrange(aes(ymin = cil, ymax = cih)) + 
  xlim(c(0,1)) + ylim(c(0,1)) + 
  facet_wrap(~depth) +
  ylab("Proportion choosing target") + 
  xlab("Model Predictions") + 
  geom_smooth(method="lm", aes(group=1)) + 
  geom_abline(slope = 1, intercept = 0, lty=2) +
  ggtitle("Common knowledge priors model sims, alpha = 1") + theme_bw()

Correlations

print("Common knowledge priors models, alpha held at 1, varied depth")
## [1] "Common knowledge priors models, alpha held at 1, varied depth"
cors_ck <- sims_prior_ck %>%
  group_by(depth) %>%
  summarise("cor" = cor(p, preds)) %>%
  mutate("alpha" = 1)
cors_ck %>%
  kable()
depth cor alpha
0 0.7357369 1
1 0.8581523 1
2 0.8687076 1
3 0.8720136 1
4 0.8726817 1
5 0.8723590 1

Fitting alpha

## Need to identify indices we want for performance comparisons
fullData <- fullData %>%
  rowwise %>%
  mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"))
keepIndices <- c("keep", "throwout")
compareIndices  <- which(fullData$tuningData == "keep")

## Parallelize fitting
priorFit_ck <- mclapply(DEPTHS, function(i) { ## parallelize over depths
  rsa.tuneDepthAlpha(fullData, quantityVarName = "object",
                     semanticsVarName = "speaker.p", itemVarName = "query",
                     priorsVarName = "priorValue", groupName = "grouper",
                     compareDataName = "p", compareIndices = compareIndices,
                     depth = i, alphas = ALPHAS, usePriorEveryRecurse = TRUE)
  },
  mc.cores = detectCores())
aggregateTuneData_ck <- convertListToDF(priorFit_ck)

Model performance

## Get best model performance by alpha, depth
maxCors_ck <- aggregateTuneData_ck %>%
  group_by(depth) %>%
  summarize(maxCor = max(cor),
            alpha = alpha[which.max(cor)])

Plot performance tuning

aggregateTuneData_plot_ck <- aggregateTuneData_ck %>%
  mutate(Recursive_depth = as.factor(depth))

## Plot tuning (Plot adapted for personal_site)
ggplot(aggregateTuneData_plot_ck, aes(x = alpha, y = cor, col=Recursive_depth)) +
  geom_point(alpha=0.6, size=4) +
  geom_vline(xintercept=maxCors_ck$alpha, col="grey", lty="dotdash")+
  ylab("Correlation") +
  scale_x_continuous(breaks = round(seq(0, 6, by = 0.1)), "Rationality (alpha level)") +
  # theme_solarized() +
  theme_bw() +
  theme(legend.position="bottom", legend.box="horizontal",
        legend.text=element_text(size=8), legend.direction="horizontal") 

Best priors model performance with fitted alphas

depths_param_data_ck <- maxCors_ck %>%
  arrange(depth)
best_ck_prior_models_fitted_alpha <- convertListToDF(lapply(DEPTHS, function(depth) {
  plyr::ddply(fullData, .variables = c("grouper"), rsa.runDf,
              quantityVarName = "object", semanticsVarName = "speaker.p",
              priors = "priorValue", usePriorEveryRecurse = FALSE,
              itemVarName = "query", depth = depths_param_data_ck$depth[depth + 1], alpha = depths_param_data_ck$alpha[depth + 1]) %>%
    mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
           depth = depth) %>%
    filter(tuningData == "keep")
  }))

Plot best common knowldege priors model performance with fitted alphas

print("Common knowledge priors performance for recursive depth 0-5 with fitted alphas")
## [1] "Common knowledge priors performance for recursive depth 0-5 with fitted alphas"
maxCors_ck ## performance data
## # A tibble: 6 × 3
##   depth    maxCor alpha
##   <dbl>     <dbl> <dbl>
## 1     0 0.7357369   0.0
## 2     1 0.9453030   3.1
## 3     2 0.9180203   2.0
## 4     3 0.9138164   3.3
## 5     4 0.9087468   3.0
## 6     5 0.9007427   2.9
## plots
ggplot(best_ck_prior_models_fitted_alpha, aes(x = preds, y = p, col = expt, pch = object)) +
  geom_pointrange(aes(ymin = cil, ymax = cih)) + 
  xlim(c(0,1)) + ylim(c(0,1)) + 
  facet_wrap(~depth, labeller = label_both) +
  ylab("Proportion choosing target") + 
  xlab("Model Predictions") + 
  geom_smooth(method="lm", aes(group=1)) + 
  geom_abline(slope = 1, intercept = 0, lty=2) +
  ggtitle("Common knowledge priors model performance for recursive depth 0-5 with fitted alphas") + theme_bw()

Simulations WITHOUT priors (recursive depth 0-5)

Recursive depth sims

sims_NoPrior_temp <- lapply(DEPTHS, function(depth) {
  plyr::ddply(fullData, .variables = c("grouper"), rsa.runDf,
            quantityVarName = "object", semanticsVarName = "speaker.p",
            itemVarName = "query", depth = depth) %>%
  rowwise %>%
  mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
         depth = depth) %>%
  filter(tuningData == "keep")
})
sims_NoPrior <- convertListToDF(sims_NoPrior_temp)

Simulation plots

Recursive depth 0-2

sims_NoPrior %>%
  filter(depth < 3) %>%
  ggplot(., aes(x = preds, y = p, col = expt, pch = object)) +
    geom_pointrange(aes(ymin = cil, ymax = cih)) + 
    xlim(c(0,1)) + ylim(c(0,1)) + 
    facet_wrap(~depth) +
    ylab("Proportion choosing target") + 
    xlab("Model Predictions") + 
    geom_smooth(method="lm", aes(group=1)) + 
    geom_abline(slope = 1, intercept = 0, lty=2) + theme_bw()

Recursive depth 0-5

ggplot(sims_NoPrior, aes(x = preds, y = p, col = expt, pch = object)) +
  geom_pointrange(aes(ymin = cil, ymax = cih)) + 
  xlim(c(0,1)) + ylim(c(0,1)) + 
  facet_wrap(~depth) +
  ylab("Proportion choosing target") + 
  xlab("Model Predictions") + 
  geom_smooth(method="lm", aes(group=1)) + 
  geom_abline(slope = 1, intercept = 0, lty=2)+ theme_bw()

Correlations

print("No priors models, alpha held at 1, varied depth")
## [1] "No priors models, alpha held at 1, varied depth"
cors_noPriors <- sims_NoPrior %>%
  group_by(depth) %>%
  summarise("cor" = cor(p, preds)) %>%
  mutate("alpha" = 1)
cors_noPriors %>%
  kable()
depth cor alpha
0 0.7357369 1
1 0.9425637 1
2 0.9505864 1
3 0.9474159 1
4 0.9436402 1
5 0.9404205 1

Fitting alpha

## Check that we have fullData and compareIndices for tuning
if (!(inGlobalEnv("fullData") & inGlobalEnv("compareIndices"))) {
  fullData <- fullData %>%
  rowwise %>%
  mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"))
  keepIndices <- c("keep", "throwout")
  compareIndices  <- which(fullData$tuningData == "keep")  
}

## Parallelize fitting
noPriorFit <- mclapply(DEPTHS, function(i) { ## parallelize over depths
  rsa.tuneDepthAlpha(fullData, quantityVarName = "object",
                     semanticsVarName = "speaker.p", itemVarName = "query",
                     compareIndices = compareIndices, compareDataName = "p",
                     groupName = "grouper",
                     depth = i, alphas = ALPHAS)
  },
  mc.cores = detectCores())
aggregateTuneData_noPriors <- convertListToDF(noPriorFit)

Model performance

## Get best model performance by alpha, depth
maxCors_noPriors <- aggregateTuneData_noPriors %>%
  group_by(depth) %>%
  summarize(maxCor = max(cor),
            alpha = alpha[which.max(cor)])

Plot performance tuning

## Plot tuning
ggplot(aggregateTuneData_noPriors, aes(x = alpha, y = cor, col = as.factor(depth))) +
  geom_point() +
  ylim(0.75, 1) +
  geom_vline(xintercept = maxCors_noPriors$alpha, lty = 4, col = "grey50") +
  scale_x_continuous(breaks = round(seq(1, 6, by = 0.1)), "alpha") +
  ggtitle("Model performance - no priors") + theme_bw()

Best NO-priors model performance with fitted alphas

depths_param_data <- maxCors_noPriors %>%
  arrange(depth)
best_noPrior_models_fitted_alpha <- convertListToDF(lapply(DEPTHS, function(depth) { ## parallelize over depths
  plyr::ddply(fullData, .variables = c("grouper"), rsa.runDf,
              quantityVarName = "object", semanticsVarName = "speaker.p",
              usePriorEveryRecurse = FALSE, itemVarName = "query",
              depth = depths_param_data$depth[depth + 1], alpha = depths_param_data$alpha[depth + 1]) %>%
    mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
           depth = depth) %>%
    filter(tuningData == "keep")
  }))
## Quick check
## cor(best_noPrior_models_fitted_alpha[best_noPrior_models_fitted_alpha$depth == 1, ]$p, best_noPrior_models_fitted_alpha[best_noPrior_models_fitted_alpha$depth == 1, ]$preds)

Plot best No priors model performance with fitted alphas

print("Model performance for recursive depth 0-5 with fitted alphas no priors")
## [1] "Model performance for recursive depth 0-5 with fitted alphas no priors"
maxCors_noPriors ## performance data
## # A tibble: 6 × 3
##   depth    maxCor alpha
##   <dbl>     <dbl> <dbl>
## 1     0 0.7357369   0.0
## 2     1 0.9457362   1.5
## 3     2 0.9513002   0.9
## 4     3 0.9520095   0.8
## 5     4 0.9520683   0.7
## 6     5 0.9521724   0.7
## plots
ggplot(best_noPrior_models_fitted_alpha, aes(x = preds, y = p, col = expt, pch = object)) +
  geom_pointrange(aes(ymin = cil, ymax = cih)) + 
  xlim(c(0,1)) + ylim(c(0,1)) + 
  facet_wrap(~depth, labeller = label_both) +
  ylab("Proportion choosing target") + 
  xlab("Model Predictions") + 
  geom_smooth(method="lm", aes(group=1)) + 
  geom_abline(slope = 1, intercept = 0, lty=2) +
  ggtitle("No-prior model performance for recursive depth 0-5 with fitted alphas") + theme_bw()

Priors comparison

Combine fitted alphas data

no_fit_alphas_all_models <- rbind(
  cors_pk %>% mutate(prior_type="pk"),
  cors_ck %>% mutate(prior_type="ck"),
  cors_noPriors %>% mutate(prior_type="none")
)
fit_alphas_all_models <- rbind(
  maxCors_pk %>% mutate(prior_type="pk"),
  maxCors_ck %>% mutate(prior_type="ck"),
  maxCors_noPriors %>% mutate(prior_type="none")
)

all_cors_d <- merge(no_fit_alphas_all_models[,which(names(no_fit_alphas_all_models) != "alpha")], fit_alphas_all_models) %>%
  gather(type, r, c(cor, maxCor)) %>%
  mutate(alpha=ifelse(type=="cor", 1.0, alpha))
ggplot(all_cors_d, aes(x=as.numeric(depth), y=r, col=prior_type)) +
  geom_line() +
  facet_wrap(~type) +
  theme_bw()

ggplot(all_cors_d, aes(x=as.numeric(depth), y=r, col=prior_type)) +
  geom_point(aes(size=alpha), alpha=0.4) +
  facet_wrap(~type) +
  theme_bw()

merge(no_fit_alphas_all_models[,which(names(no_fit_alphas_all_models) != "alpha")], fit_alphas_all_models) %>% 
  rename(fitted_alpha=alpha,
         unfitted_cor=cor,
         fitted_cor=maxCor) %>%
  kable()
depth prior_type unfitted_cor fitted_cor fitted_alpha
0 ck 0.7357369 0.7357369 0.0
0 none 0.7357369 0.7357369 0.0
0 pk 0.7357369 0.7357369 0.0
1 ck 0.8581523 0.9453030 3.1
1 none 0.9425637 0.9457362 1.5
1 pk 0.8581523 0.9453030 3.1
2 ck 0.8687076 0.9180203 2.0
2 none 0.9505864 0.9513002 0.9
2 pk 0.9281867 0.9522229 1.4
3 ck 0.8720136 0.9138164 3.3
3 none 0.9474159 0.9520095 0.8
3 pk 0.9472652 0.9527394 1.1
4 ck 0.8726817 0.9087468 3.0
4 none 0.9436402 0.9520683 0.7
4 pk 0.9523711 0.9523711 1.0
5 ck 0.8723590 0.9007427 2.9
5 none 0.9404205 0.9521724 0.7
5 pk 0.9528916 0.9528916 1.0

Seems like Common Knowledge prior underperforms except at the single level of recursion. Of course, results in this case are the same as the Private Knowledge prior, because the prior is included only once…

Explicit comparisons with MF sims.Rmd

Combine priors and unif priors

## combine priors and uniform
sims_prior <- sims_prior_pk %>%
  mutate(prior = "prior")
sims_NoPrior <- sims_NoPrior %>%
  mutate(prior = "uniform")
allData <- rbind(sims_prior, sims_NoPrior) %>%
  mutate(model = paste0("L", depth)) %>%
  arrange(prior, query, object, matrix, cond, expt, model)

Compare with MF data

## Compare with MFs data
mfD <- read.csv("data/modelRuns.csv") %>%
  arrange(prior, query, object, matrix, cond, expt, model)

## Add in MF preds check correlation
allData$mdPreds <- mfD$prediction

ggplot(allData, aes(x = preds, y = mdPreds, fill = matrix, size = depth)) +
    xlab("rrrsa predictions") +
    ylab("MF predictions") +
    geom_point(shape = 21, alpha = 0.30) + theme_bw()

# ggsave("pragmods.png", path = "~/Desktop/", width=7, height=3)

Propagating errors into model

Bootstrap se helpers

Helpers - calculating bootstrapepd CIs

#####
##### se()
##### ----------------------
##### given a numeric vector calcuate std error :: sqrt(var(v))
#####
se <- function(v) {
  sqrt(var(v))
}

#####
##### ci()
##### ----------------------
##### given a numeric vector calculate confidence intervals of width, `width`
#####
ci <- function(mu, sd, n, width = 0.95) {
  interval <- (1 - width) / 2
  error <- (qnorm(1 - interval) * sd) / sqrt(n)
  return(c("bstrp_mu" = mu, "bstrp_err" = error))
}

#####
##### get_se()
##### ----------------------
##### given a df of bootstrapped estimates return std error
#####
get_se <- function(boot_ests) {
  apply(as.matrix(boot_ests), 1, se)  
}

#####
##### get_ci()
##### ----------------------
##### given a df of bootstrapped estimates return `width` confidence intervals
#####
get_ci <- function(boot_ests, width = 0.95) {
  apply(as.matrix(boot_ests), 1, FUN = function(ests) {
    mu <- mean(ests)
    sd <- sd(ests)
    n <- length(ests)
    return(ci(mu, sd, n, width))
  })
}

#####
##### boot_vec()
##### -------------
##### multinomial bootstrap
##### given vector of counts return n boostrap 
##### estimates of proportions
#####
boot_vec <- function(countsVec, n = 1) {
  t(normalizeBy(rmultinom(n, sum(countsVec), countsVec), rows = FALSE))
}
#####
##### pragmods_priors_boot()
##### ----------------------
##### custom pragmods wrapper to boot_priors()
##### used with mapply() to create new prior estimates
#####
pragmods_priors_boot <- function(c1, c2, c3, n = 1) {
  boot_vec(c("foil_cnts" = c1, "logical_cnts" = c2, "target_cnts" = c3), n)
}

#####
##### add_booted_priors()
##### ----------------------
##### given a df with prior counts for 'foil', 'logical' and 'target'
##### return new df with n = 1 boostrap estimates of priors
##### NOTE :: like pragmods_priors_boot this is not a generic function -
##### assumes naming convnetions established here ('foil_cnts', 'logical_cnts', 'target_cnts')
#####
add_booted_priors <- function(df) {
  res <- convertListToDF(with(df, mapply(pragmods_priors_boot, foil_cnts, logical_cnts, target_cnts, SIMPLIFY = FALSE)))
  names(res) <- c("foil_bt", "logical_bt", "target_bt")
  cbind(df, res)  
}

#####
##### run_boot_sims()
##### ---------------------
##### Run `n_sims` simulations with `alhpa`, `depth` 
##### Expects a df `d_prior_counts` that contains prior counts data
#####
run_boot_sims <- function(d_prior_counts, alpha = 1, depth = 1, n_sims = 100, verbose = TRUE) {
  ## Display run
  if (verbose) {
    cat(paste("Running ", n_sims, " simulations with alpha = ", alpha, ""))
  }
  
  ## Store n boostrapped simulations 
  bstrap_sims <- mclapply(seq(1, n_sims), FUN = function(i) {
    ## Convert to rrrsa df
    d_booted_priors <-
      rrrsa_ready_df(add_booted_priors(d_prior_counts),
                     foil_name = "foil_bt", logical_name = "logical_bt", target_name = "target_bt")
    ## Run rrrsa
    res <- plyr::ddply(d_booted_priors, .variables = c("grouper"), rsa.runDf,
              quantityVarName = "object", semanticsVarName = "speaker.p",
              itemVarName = "query", alpha = alpha, depth = depth,
              priorsVarName = "priorValue", usePriorEveryRecurse = FALSE) %>%
    rowwise %>%
    mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
           depth = depth) %>%
    filter(tuningData == "keep")
    ## Return predictions
    res$preds
  }, 
  mc.cores = detectCores())
  
  ## Convert simulations to df
  pred_sims <- convertListToDF(bstrap_sims, rows = FALSE)
  pred_sims
}

Boostrapped runs

Prior counts data

## Get prior counts data (from sims.R)
priorCounts <- read.csv("data/prior_counts.csv")
names(priorCounts)[1:3] <- c("foil_cnts", "logical_cnts", "target_cnts")
## Merge with our original data set
d_prior_counts <- merge(d, priorCounts, by = "prior", all = TRUE)

Boostrapped data at depth == 5

Let’s lookat at bootstrapped levels at a single level of recursion. These next two code blocks are useful for verifying that were measuring the correct quantitites.

depth <- 4
d5_bstrap_sims <- run_boot_sims(d_prior_counts, depth = depth, n_sims = 100, verbose = FALSE)

Plot d5 strap sims

d5_bstrap_sims$simNum <- seq(1, nrow(d5_bstrap_sims))
d5_bstrap_sims_long <- d5_bstrap_sims %>%
  gather(itNum, val, -simNum) %>%
  select(simNum, val)

## How do the sims look (apprx normally distr)?
d5_bstrap_sims_long %>%
  filter(val != 1 & val != 0 & val != 0.5) %>%
  ggplot(aes(val, fill = simNum)) +
    geom_density() +
    facet_wrap(~simNum) + theme_bw()

Calc d5 bstrap cis

d5_prior_model_sims <- sims_prior[which(sims_prior$depth == depth), ]
errs <- data.frame()
for (sim_row in seq(1, nrow(d5_bstrap_sims))) {
    ## Get diff with our estimate
    diffs <- d5_prior_model_sims$preds[sim_row] - unlist(d5_bstrap_sims[sim_row, ])
    errs <- rbind(errs, diffs)
}
cis <- data.frame(t(apply(errs, MARGIN = 1, FUN = quantile, c(0.05, 0.95))))
names(cis) <- c('low', 'hi')

d5_bstrap_cis <- cbind(d5_prior_model_sims, cis)
## Need to add bounds to CIs
d5_bstrap_cis <- d5_bstrap_cis %>%
  mutate(ci_low = preds - hi,
         ci_hi = preds - low)

Plot d5 bootstrap CIs

ggplot(d5_bstrap_cis, aes(x = preds, y = p, col = expt, pch = object)) +
  geom_errorbarh(aes(xmin = ci_hi, xmax = ci_low, height = 0.05)) +
  geom_errorbar(aes(ymin = cil, ymax = cih, width = 0.05)) + 
  xlim(c(0,1)) + ylim(c(0,1)) + 
  ylab("Proportion choosing target") + 
  xlab("Model Predictions") + 
  facet_wrap(~depth) +
  geom_smooth(method="lm", aes(group=1)) + 
  geom_abline(slope = 1, intercept = 0, lty=2) +
  ggtitle(paste0("Priors model, depth = ", as.character(depth), ", bootatrapped 95% CIs")) + theme_bw()

Boostrapped data - all DEPTHS and calculate CIs

## Run simulations for each depth
sims_prior_bstrap_errs <- read.csv("data/sims_prior_bstrap_errs.csv") ## From previous run with set.seed(1)

## Run below code if we don't have saved previous sims
## ---------------------------------------------------
if (!inGlobalEnv("sims_prior_bstrap_errs")) {
  depth_sims <- mclapply(DEPTHS, FUN = function(depth) {
      run_boot_sims(d_prior_counts, depth = depth, n_sims = 500, verbose = FALSE)
      }, mc.cores = detectCores()) 
   
  ## For each simulation level
  cis <- data.frame() ## df to hold CIs
  
  # fn <- function() {
  for (depth in seq(1, length(depth_sims))) {
    ## Current depth data
    curr_data <- data.frame(depth_sims[depth])
    errs <- data.frame() ## Hold ci diffs
    ## For each simulation (row) in the current data frame
    for (sim_row in seq(1, nrow(curr_data))) {
      ## Get diff with our estimate
      diffs <-  sims_prior[which(sims_prior$depth == depth - 1), ]$preds[sim_row] - unlist(curr_data[sim_row, ])
      errs <- rbind(errs, diffs)
    }
    cis <- rbind(cis, data.frame(t(apply(errs, MARGIN = 1, FUN = quantile, c(0.05, 0.95)))))
  }
  # }
  # debug(fn)
  # undebug(fn)
  # cis <- fn()

  names(cis) <- c('low', 'hi')
  sims_prior_bstrap_errs <- cbind(sims_prior, cis)
  ## Need to add bounds to CIs
  sims_prior_bstrap_errs <- sims_prior_bstrap_errs %>%
    mutate(ci_low = preds - hi,
           ci_hi = preds - low)
}
# write.csv(sims_prior_bstrap_errs, "sims_prior_bstrap_errs.csv")

Plot with boostrapped errs - all depths

ggplot(sims_prior_bstrap_errs, aes(x = preds, y = p, col = expt, pch = object)) +
  geom_errorbarh(aes(xmin = ci_hi, xmax = ci_low, height = 0.05)) +
  geom_errorbar(aes(ymin = cil, ymax = cih, width = 0.05)) + 
  xlim(c(0,1)) + ylim(c(0,1)) + 
  ylab("Proportion choosing target") + 
  xlab("Model Predictions") + 
  facet_wrap(~depth) +
  geom_smooth(method="lm", aes(group=1)) + 
  geom_abline(slope = 1, intercept = 0, lty=2) +
  ggtitle("Priors model, bootstrapped (500 sims) 95% CIs") + theme_bw()

Why do we see more variance with higher recursive depth?

sims_prior_bstrap_errs %>%
  mutate(ci_width = hi - low,
         type = paste0(cond, "-", expt)) %>%
  ggplot(aes(x = depth, y = ci_width, col = type)) +
  geom_point() + theme_bw()

Analysis by experiment intent (study priors vs recursive depth)

Data for prior vs depth expt types

depth_tests <- c("complex", "oddman", "simple", "twins")
priors_tests <- c("baserate", "color", "lang")
full_data_type <- fullData %>%
  mutate(type = ifelse(expt %in% depth_tests, "depth_test", "priors_test"))
full_data_priors_tests <- full_data_type %>% filter(type == "priors_test")
full_data_depth_tests <- full_data_type %>% filter(type == "depth_test")

What are effects of priors on prior oriented expts

full_data_priors_tests <- full_data_priors_tests %>% arrange(grouper)
full_data_priors_tests <- full_data_priors_tests %>%
  rowwise %>%
  mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"))
keepIndices <- c("keep", "throwout")
compareIndices  <- which(full_data_priors_tests$tuningData == "keep")

Prior vs noPrior model preds

prior_d <- plyr::ddply(full_data_priors_tests, .variables = c("grouper"), .fun = rsa.runDf,
            quantityVarName = "object", semanticsVarName = "speaker.p",
            itemVarName = "query", priorsVarName = "priorValue",
            usePriorEveryRecurse = FALSE) %>%
  filter(tuningData == "keep")

noPrior_d <- plyr::ddply(full_data_priors_tests, .variables = c("grouper"), .fun = rsa.runDf,
            quantityVarName = "object", semanticsVarName = "speaker.p",
            itemVarName = "query",
            usePriorEveryRecurse = FALSE) %>%
  filter(tuningData == "keep")


join_priors_noPriors <- cbind(prior_d, noPrior_d$preds)
names(join_priors_noPriors)[length(names(join_priors_noPriors))] <- 'noPrior_preds'
join_priors_noPriors <- join_priors_noPriors %>%
  gather(prior_type, value, c(preds, noPrior_preds))


ggplot(join_priors_noPriors, aes(x = value, y = p, col = prior_type)) +
  geom_pointrange(aes(ymin = cil, ymax = cih)) + 
  xlim(c(0,1)) + ylim(c(0,1)) + 
  facet_wrap(~prior) +
  ylab("Proportion choosing target") + 
  xlab("Model Predictions") + theme_bw()

#   geom_smooth(method="lm", aes(group=1)) + 
#   geom_abline(slope = 1, intercept = 0, lty=2) +
#   ggtitle("Model performance for recursive depth 0-5 with fitted alphas")  

Priors

## Parallelize fitting
priorFit <- mclapply(DEPTHS, function(i) { ## parallelize over depths
  rsa.tuneDepthAlpha(full_data_priors_tests, quantityVarName = "object",
                     semanticsVarName = "speaker.p", itemVarName = "query",
                     priorsVarName = "priorValue", groupName = "grouper",
                     compareDataName = "p", compareIndices = compareIndices,
                     depth = i, alphas = ALPHAS, usePriorEveryRecurse = FALSE)
  },
  mc.cores = detectCores())

aggregateTuneData_priors <- convertListToDF(priorFit)
aggregateTuneData_priors %>%
  group_by(depth) %>%
  summarize(maxCor = max(cor),
            alpha = alpha[which.max(cor)])
## # A tibble: 6 × 3
##   depth    maxCor alpha
##   <dbl>     <dbl> <dbl>
## 1     0 0.6312373   0.0
## 2     1 0.9470066   3.8
## 3     2 0.9469632   1.6
## 4     3 0.9465052   1.3
## 5     4 0.9469024   1.1
## 6     5 0.9459132   1.1

No Priors

noPriorFit <- mclapply(DEPTHS, function(i) { ## parallelize over depths
  rsa.tuneDepthAlpha(full_data_priors_tests, quantityVarName = "object",
                     semanticsVarName = "speaker.p", itemVarName = "query",
                     groupName = "grouper",
                     compareDataName = "p", compareIndices = compareIndices,
                     depth = i, alphas = ALPHAS, usePriorEveryRecurse = FALSE)
  },
  mc.cores = detectCores())

aggregateTuneData_noPriors <- convertListToDF(noPriorFit)
aggregateTuneData_noPriors %>%
  group_by(depth) %>%
  summarize(maxCor = max(cor),
            alpha = alpha[which.max(cor)])
## # A tibble: 6 × 3
##   depth    maxCor alpha
##   <dbl>     <dbl> <dbl>
## 1     0 0.6312373   0.0
## 2     1 0.9489123   1.9
## 3     2 0.9488210   1.0
## 4     3 0.9488951   0.8
## 5     4 0.9488517   0.8
## 6     5 0.9487172   0.8

How does alpha fitting respond to depth and priors

combined_tuning <- left_join(aggregateTuneData_priors, aggregateTuneData_noPriors, by = c("depth", "alpha"))
names(combined_tuning) <- c("priors", "depth", "alpha", "noPriors")
combined_tuning <- combined_tuning %>%
  gather(type, correlation, -c(depth, alpha))
ggplot(combined_tuning, aes(x = alpha, y = correlation, col = as.factor(type))) +
  geom_point() +
  facet_wrap(~depth) + 
  ggtitle("Impact of priors on model preds with and without priors") + theme_bw()

How does recursion help holding alpha and priors constant?

full_data_depth_tests <- full_data_depth_tests %>% arrange(grouper)
full_data_depth_tests <- full_data_depth_tests %>%
  rowwise %>%
  mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"))
keepIndices <- c("keep", "throwout")
compareIndices  <- which(full_data_depth_tests$tuningData == "keep")

sims_prior_temp <- lapply(seq(0, 2), function(depth) {
  plyr::ddply(full_data_depth_tests, .variables = c("grouper"), rsa.runDf,
            quantityVarName = "object", semanticsVarName = "speaker.p",
            itemVarName = "query", priorsVarName = "priorValue", depth = depth, usePriorEveryRecurse = FALSE) %>%
  rowwise %>%
  mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
         depth = depth) %>%
  filter(tuningData == "keep")
})
sims_prior_temp <- convertListToDF(sims_prior_temp)
sims_prior_temp %>% filter(preds != 0, preds != 1) %>%
  ggplot(aes(x = preds, y = p, col = expt, pch = object)) +
  geom_pointrange(aes(ymin = cil, ymax = cih)) + 
  xlim(c(0,1)) + ylim(c(0,1)) + 
  facet_wrap(expt~depth, labeller = label_both, ncol = 3) +
  ylab("Proportion choosing target") + 
  xlab("Model Predictions") + 
  geom_smooth(method="lm", aes(group=1)) + 
  geom_abline(slope = 1, intercept = 0, lty=2) +
  ggtitle("Model performance for recursive depth 0-2 with fitted alphas") + theme_bw()

sims_prior_temp %>%
  group_by(expt, depth) %>%
  summarise(correlation = cor(preds, p))
## Source: local data frame [12 x 3]
## Groups: expt [?]
## 
##       expt depth correlation
##     <fctr> <int>       <dbl>
## 1  complex     0   0.8385561
## 2  complex     1   0.9611770
## 3  complex     2   0.9763472
## 4   oddman     0   0.9497125
## 5   oddman     1   0.9763249
## 6   oddman     2   0.9763249
## 7   simple     0   0.9001967
## 8   simple     1   0.9517313
## 9   simple     2   0.9898410
## 10   twins     0   0.9761129
## 11   twins     1   0.9970806
## 12   twins     2   0.9875595

Salience analysis

Plot preds by experiment

ggplot(allData, aes(x = preds, y = p, col = prior)) +
  geom_point(shape = 21, alpha = 0.85) +
  facet_grid(model ~ cond) + theme_bw()

Plot prior’s wonkiness

allData %>%
  filter(grouper == 'baserate-72' | grouper == 'color-47') %>%
  ggplot(aes(x = preds, y = p, col = prior, shape = as.factor(object))) +
    geom_point(alpha = 0.5, size = 4) +
    facet_grid(model ~ grouper) +
    geom_abline(slope = 1, intercept = 0, lty=2) + theme_bw()

Almost looks like target and logical could be swapped… But with more recursions, the priors-model begins to improve…

Wonky priors vs unif priors

temp_d <- sims_prior %>% filter(grouper == 'baserate-72' | grouper == 'color-47') %>%
  arrange(cond, expt, matrix, prior, object, count, p)

temp_d$unif_preds <- sims_NoPrior %>% filter(grouper == 'baserate-72' | grouper == 'color-47') %>%
  arrange(cond, expt, matrix, prior, object, count, p) %>%
  mutate(unif_preds = preds) %>%
  select(unif_preds) %>%
  unlist()

temp_d <- temp_d %>%
  mutate(delta = preds - unif_preds) %>%
  arrange(-abs(delta))

## uniform vs priors
ggplot(temp_d, aes(x = unif_preds, y = preds, col = factor(grouper), size = factor(depth), shape = as.factor(object))) +
  geom_point() +
  geom_abline(slope = 1, intercept = 0, lty=2) + theme_bw()

## Take a look at the data
temp_d %>% kable()
cond expt matrix prior query object count p n cil cih priorType priorValue grouper speaker.p preds tuningData depth unif_preds delta
foil color simple prior glasses logical 6 0.1276596 47 0.0474016 0.2323648 logical 0.5185185 color-47 0.5 0.7000000 keep 1 0.2500000 0.4500000
foil color simple prior glasses target 39 0.8297872 47 0.7146994 0.9234288 target 0.0740741 color-47 0.5 0.3000000 keep 1 0.7500000 -0.4500000
foil color simple prior glasses logical 6 0.1276596 47 0.0474016 0.2323648 logical 0.5185185 color-47 0.5 0.5833333 keep 2 0.1666667 0.4166667
foil color simple prior glasses target 39 0.8297872 47 0.7146994 0.9234288 target 0.0740741 color-47 0.5 0.4166667 keep 2 0.8333333 -0.4166667
foil color simple prior glasses logical 6 0.1276596 47 0.0474016 0.2323648 logical 0.5185185 color-47 0.5 0.5000000 keep 3 0.1250000 0.3750000
foil color simple prior glasses target 39 0.8297872 47 0.7146994 0.9234288 target 0.0740741 color-47 0.5 0.5000000 keep 3 0.8750000 -0.3750000
foil color simple prior glasses logical 6 0.1276596 47 0.0474016 0.2323648 logical 0.5185185 color-47 0.5 0.4375000 keep 4 0.1000000 0.3375000
foil color simple prior glasses target 39 0.8297872 47 0.7146994 0.9234288 target 0.0740741 color-47 0.5 0.5625000 keep 4 0.9000000 -0.3375000
0.11 baserate simple prior glasses logical 12 0.1666667 72 0.0893488 0.2582384 logical 0.7735849 baserate-72 0.5 0.5774648 keep 1 0.2500000 0.3274648
0.11 baserate simple prior glasses target 54 0.7500000 72 0.6464264 0.8429448 target 0.1886792 baserate-72 0.5 0.4225352 keep 1 0.7500000 -0.3274648
foil color simple prior glasses target 39 0.8297872 47 0.7146994 0.9234288 target 0.0740741 color-47 0.5 0.6111111 keep 5 0.9166667 -0.3055556
foil color simple prior glasses logical 6 0.1276596 47 0.0474016 0.2323648 logical 0.5185185 color-47 0.5 0.3888889 keep 5 0.0833333 0.3055556
0.11 baserate simple prior glasses logical 12 0.1666667 72 0.0893488 0.2582384 logical 0.7735849 baserate-72 0.5 0.4505495 keep 2 0.1666667 0.2838828
0.11 baserate simple prior glasses target 54 0.7500000 72 0.6464264 0.8429448 target 0.1886792 baserate-72 0.5 0.5494505 keep 2 0.8333333 -0.2838828
0.11 baserate simple prior glasses target 54 0.7500000 72 0.6464264 0.8429448 target 0.1886792 baserate-72 0.5 0.6306306 keep 3 0.8750000 -0.2443694
0.11 baserate simple prior glasses logical 12 0.1666667 72 0.0893488 0.2582384 logical 0.7735849 baserate-72 0.5 0.3693694 keep 3 0.1250000 0.2443694
0.11 baserate simple prior glasses target 54 0.7500000 72 0.6464264 0.8429448 target 0.1886792 baserate-72 0.5 0.6870229 keep 4 0.9000000 -0.2129771
0.11 baserate simple prior glasses logical 12 0.1666667 72 0.0893488 0.2582384 logical 0.7735849 baserate-72 0.5 0.3129771 keep 4 0.1000000 0.2129771
0.11 baserate simple prior glasses logical 12 0.1666667 72 0.0893488 0.2582384 logical 0.7735849 baserate-72 0.5 0.2715232 keep 5 0.0833333 0.1881898
0.11 baserate simple prior glasses target 54 0.7500000 72 0.6464264 0.8429448 target 0.1886792 baserate-72 0.5 0.7284768 keep 5 0.9166667 -0.1881898
0.11 baserate simple prior glasses foil 6 0.0833333 72 0.0300468 0.1547736 foil 0.0377358 baserate-72 0.0 0.0000000 keep 0 0.0000000 0.0000000
0.11 baserate simple prior glasses foil 6 0.0833333 72 0.0300468 0.1547736 foil 0.0377358 baserate-72 0.0 0.0000000 keep 1 0.0000000 0.0000000
0.11 baserate simple prior glasses foil 6 0.0833333 72 0.0300468 0.1547736 foil 0.0377358 baserate-72 0.0 0.0000000 keep 2 0.0000000 0.0000000
0.11 baserate simple prior glasses foil 6 0.0833333 72 0.0300468 0.1547736 foil 0.0377358 baserate-72 0.0 0.0000000 keep 3 0.0000000 0.0000000
0.11 baserate simple prior glasses foil 6 0.0833333 72 0.0300468 0.1547736 foil 0.0377358 baserate-72 0.0 0.0000000 keep 4 0.0000000 0.0000000
0.11 baserate simple prior glasses foil 6 0.0833333 72 0.0300468 0.1547736 foil 0.0377358 baserate-72 0.0 0.0000000 keep 5 0.0000000 0.0000000
0.11 baserate simple prior glasses logical 12 0.1666667 72 0.0893488 0.2582384 logical 0.7735849 baserate-72 0.5 0.5000000 keep 0 0.5000000 0.0000000
0.11 baserate simple prior glasses target 54 0.7500000 72 0.6464264 0.8429448 target 0.1886792 baserate-72 0.5 0.5000000 keep 0 0.5000000 0.0000000
foil color simple prior glasses foil 2 0.0425532 47 0.0034643 0.1142362 foil 0.4074074 color-47 0.0 0.0000000 keep 0 0.0000000 0.0000000
foil color simple prior glasses foil 2 0.0425532 47 0.0034643 0.1142362 foil 0.4074074 color-47 0.0 0.0000000 keep 1 0.0000000 0.0000000
foil color simple prior glasses foil 2 0.0425532 47 0.0034643 0.1142362 foil 0.4074074 color-47 0.0 0.0000000 keep 2 0.0000000 0.0000000
foil color simple prior glasses foil 2 0.0425532 47 0.0034643 0.1142362 foil 0.4074074 color-47 0.0 0.0000000 keep 3 0.0000000 0.0000000
foil color simple prior glasses foil 2 0.0425532 47 0.0034643 0.1142362 foil 0.4074074 color-47 0.0 0.0000000 keep 4 0.0000000 0.0000000
foil color simple prior glasses foil 2 0.0425532 47 0.0034643 0.1142362 foil 0.4074074 color-47 0.0 0.0000000 keep 5 0.0000000 0.0000000
foil color simple prior glasses logical 6 0.1276596 47 0.0474016 0.2323648 logical 0.5185185 color-47 0.5 0.5000000 keep 0 0.5000000 0.0000000
foil color simple prior glasses target 39 0.8297872 47 0.7146994 0.9234288 target 0.0740741 color-47 0.5 0.5000000 keep 0 0.5000000 0.0000000

Still looks like these might have been swapped…

Wonky priors data

allData %>%
  filter(grouper == 'baserate-72' | grouper == 'color-47') %>%
  mutate(delta = p - preds) %>%
  arrange(delta, model, grouper) %>%
  select(cond, delta, p, preds, grouper, query, object, model, priorType, priorValue)

Save this for future generations of rsa pirates

############################################################################
#### SAVE - ORIGINAL DATA PROCESSING
####
processedD <- d %>%
  ## Gather priors - we're only interested in prior for current object
  gather(priorType, priorValue, c(foil, logical, target)) %>%
  ## Filter out any mismatches between current object and priorType
  filter(object == priorType) %>%
  rowwise %>%
  ## Grouping variable concatenates experiment type and sample size (expt-n)
  mutate(grouper = paste0(as.character(expt), "-", as.character(n)))

fullData <- plyr::ddply(processedD, .variables = ("grouper"), .fun = completeQueries) %>%
  rowwise %>%
  ## Each row gets a unique literal semantics from getLiteraSemantics() helper
  mutate(speaker.p = getLiteralSemantics(matrix, query = query, object = object)) %>%
  arrange(expt, n)
####
#### SAVE - ORIGINAL DATA PROCESSING
############################################################################

# sims_prior %>%
#   mutate(errorH = cih - p,
#          errorL = p - cil) %>%
#   ggplot(., aes(x = preds, y = p, col = expt, pch = object)) +
#     geom_errorbarh(aes(xmin = preds - errorL, xmax = preds + errorH)) +
#     geom_errorbar(aes(ymin = cil, ymax = cih)) + 
#     xlim(c(0,1)) + ylim(c(0,1)) + 
#     facet_wrap(~depth) +
#     ylab("Proportion choosing target") + 
#     xlab("Model Predictions") + 
#     geom_smooth(method="lm", aes(group=1)) + 
#     geom_abline(slope = 1, intercept = 0, lty=2)